# Importing necessary libraries
import pandas as pd # To read the data set
import numpy as np # Importing numpy library
import seaborn as sns # For data visualization
import matplotlib.pyplot as plt # Necessary library for plotting graphs
sns.set(color_codes = True)
%matplotlib inline
from sklearn import metrics # Importing metrics
from sklearn.metrics import classification_report, accuracy_score, f1_score, recall_score, confusion_matrix, average_precision_score, roc_auc_score
from sklearn.model_selection import train_test_split # Splitting data train and test set
from sklearn.linear_model import LogisticRegression # For Logistic Reg model building
from sklearn.naive_bayes import GaussianNB # For Naive Bayes model building
from sklearn import svm # For SVM model building
from sklearn.neighbors import KNeighborsClassifier # For KNN model building
from sklearn.tree import plot_tree # For visual representation of decision trees
from sklearn.tree import DecisionTreeClassifier # For Decision Tree model building
from sklearn.ensemble import RandomForestClassifier # For Random Forest model building
from sklearn.ensemble import BaggingClassifier # For Bagging model building
from sklearn.ensemble import AdaBoostClassifier # For Adaboost model building
from sklearn.ensemble import GradientBoostingClassifier # For Gradientboost model building
from scipy.stats import zscore # Importing to find zscore
from sklearn import preprocessing # Importing to standardize data
from sklearn.impute import SimpleImputer # Importing to fill in null values in the data
# Running steps to check and prepare the data
df = pd.read_csv('Data Parkinsons', delimiter =',', engine = 'python')
df.head()
df.tail()
df.shape
df.size
df.isnull().sum()
df.count()
df.dtypes
df.info()
df.describe().transpose()
df.skew()
1.The dataset consists of (195 patients & 24 features).
2.On checking for lapses in the dataset we can conclude by saying that the data does not have any null values & does not have any major cleaning that needs to be done.
3.Our analysis above shows all the features linked to the patinets are reasonably skewed.
4.We also notice from the the 5 point summary table that there is alot of variation with the units of the data, and eventually we would need scale the same to arrive at a relatively clearer and more realistic accuracy.
# Converting status into float64 inorder to have all data in one standard type
df['status'] = df['status'].astype('float64')
df['status'].value_counts()
There are more diseased people in our dataset.
# Visual representation of respective data field features
fig, ax = plt.subplots(1,3, figsize=(18,9))
sns.distplot(df['MDVP:Fo(Hz)'], bins = 20, ax = ax[0])
sns.distplot(df['MDVP:Fhi(Hz)'], bins = 20, ax = ax[1])
sns.distplot(df['MDVP:Flo(Hz)'], bins = 20, ax = ax[2]);
Histogram of features with ("vocal" fundamental frequency). Visualization through the histogram above shows positive skewness in data.
# Plotting a graph for visual analysis of feature MDVP:FO(Hz)
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Fo(Hz)'],kde=True, color='g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Fo(Hz)'], kde=True, color='r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Fo(Hz) Distribution');
# Boxplot visual analysis of feature MDVP:Fo(Hz)
plt.figure(figsize=(7,5))
sns.boxplot(x='status', y='MDVP:Fo(Hz)', data=df);
# Plottoing a graph for visual analysis of feature MDVP:Fhi(Hz)
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Fhi(Hz)'], kde=True, color='g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Fhi(Hz)'], kde=True, color='r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Fhi(Hz) Distribution');
# Boxplot visual analysis of feature MDVP:Fhi(Hz)
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:Fhi(Hz)', data=df);
# Plotting a graph for visual analysis of feature MDVP:Flo(Hz)
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Flo(Hz)'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Flo(Hz)'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Flo(Hz) Distribution');
# Boxplot visual analysis of feature MDVP:Flo(Hz)
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:Flo(Hz)', data=df);
Peaks of distribution do not coincide, and there is no clear seperation in frequencies distribution of healthy and ill people.
# Visual representation of respective data field features
fig, ax = plt.subplots(2,3, figsize=(20,18))
sns.distplot(df['MDVP:Jitter(%)'], bins = 25, ax = ax[0,0])
sns.distplot(df['MDVP:Jitter(Abs)'], bins = 25, ax = ax[0,1])
sns.distplot(df['MDVP:RAP'], bins = 25, ax = ax[0,2])
sns.distplot(df['MDVP:PPQ'], bins = 25, ax = ax[1,0])
sns.distplot(df['Jitter:DDP'], bins = 25, ax = ax[1,1]);
Histogram of features with ("variation" in fundamental frequency). Visualization through the histogram above shows positive skewness in data.
# Plotting a graph for visual analysis of feature MDVP:Jitter(%)
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Jitter(%)'], kde=True, color='g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Jitter(%)'], kde=True, color='r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Jitter(%) Distribution');
# Boxplot visual analysis of feature MDVP:Jitter(%)
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:Jitter(%)', data=df);
# Plotting a graph for visual analysis of feature MDVP:Jitter(Abs)
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Jitter(Abs)'], kde=True, color='g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Jitter(Abs)'], kde=True, color='r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Jitter(Abs) Distribution');
# Boxplot visual analysis of feature MDVP:Jitter(Abs)
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:Jitter(Abs)', data=df);
# Plotting a graph for visual analysis of feature MDVP:RAP
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:RAP'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:RAP'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('MDVP:RAP Distribution');
# Boxplot visual analysis of feature MDVP:RAP
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:RAP', data=df);
# Plotting a graph for visual analysis of feature MDVP:PPQ
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:PPQ'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:PPQ'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('MDVP:PPQ Distribution');
# Boxplot visual analysis of feature MDVP:PPQ
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:PPQ', data=df);
# Plotting a graph for visual analysis of feature Jitter:DDP
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['Jitter:DDP'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['Jitter:DDP'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('Jitter:DDP Distribution');
# Boxplot visual analysis of feature Jitter:DDP
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'Jitter:DDP', data=df);
# Visual representation of respective data field features
fig, ax = plt.subplots(2,3, figsize=(20,18))
sns.distplot(df['MDVP:Shimmer'], bins = 25, ax = ax[0,0])
sns.distplot(df['MDVP:Shimmer(dB)'], bins = 25, ax = ax[0,1])
sns.distplot(df['Shimmer:APQ3'], bins = 25, ax = ax[0,2])
sns.distplot(df['Shimmer:APQ5'], bins =25, ax = ax[1,0])
sns.distplot(df['MDVP:APQ'], bins = 25, ax = ax[1,1])
sns.distplot(df['Shimmer:DDA'], bins = 25, ax = ax[1,2]);
Histogram of features, (several measures of variation in amplitude). Visualization through the histogram above shows positive skewness in data.
# Plotting a graph for visual analysis of feature MDVP:Shimmer
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Shimmer'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Shimmer'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Shimmer DIstribution');
# Boxplot visual analysis of feature MDVP:Shimmer
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:Shimmer', data=df);
# Plotting a graph for visual analysis of feature MDVP:Shimmer(dB)
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:Shimmer(dB)'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:Shimmer(dB)'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('MDVP:Shimmer(dB) Distribution');
# Boxplot visual analysis of feature MDVP:Shimmer(dB)
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:Shimmer(dB)', data=df);
# Plotting a graph for visual analysis of feature Shimmer:APQ3
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['Shimmer:APQ3'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['Shimmer:APQ3'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('Shimmer:APQ3 Distribution');
# Boxplot visual analysis of feature Shimmer:APQ3
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'Shimmer:APQ3', data=df);
# Plotting a graph for visual analysis of feature Shimmer:APQ5
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['Shimmer:APQ5'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['Shimmer:APQ5'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('Shimmer:APQ5 Distribution');
# Boxplot visual analysis of feature Shimmer:APQ5
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'Shimmer:APQ5', data=df);
# Plotting a graph for visual analysis of feature MDVP:APQ
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['MDVP:APQ'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['MDVP:APQ'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('MDVP:APQ Distribution');
# Boxplot visual analysis of feature MDVP:APQ
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'MDVP:APQ', data=df);
# Plotting a graph for visual analysis of feature Shimmer:DDA
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['Shimmer:DDA'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['Shimmer:DDA'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('Shimmer:DDA Distribution');
# Boxplot visual analysis of feature Shimmer:DDA
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'Shimmer:DDA', data=df);
# Visual representation of respective data field features
fig, ax = plt.subplots(1,2, figsize=(18,9))
sns.distplot(df['NHR'], bins = 25, ax = ax[0])
sns.distplot(df['HNR'], bins = 25, ax = ax[1]);
Visualization through histogram above shows positive skewness in (NHR) and sighlty negative skewness in (HNR).
# Plotting a graph for visual analysis of feature NHR
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['NHR'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['NHR'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('NHR Distribution');
Healthy people have a very high peak around(NHR = 0.1).
People with (NHR > 0.2) have a higher probablity of having parkinsons disease.
# Boxplot visual analysis of feature NHR
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'NHR', data=df);
Individuals with Parkinsons Disease have a wider range.
# Plotting a graph for visual analysis of feature HNR
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['HNR'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['HNR'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('HNR Distribution');
# Boxplot visual analysis of feature HNR
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'HNR', data=df);
Individuals in the rage of (18-25) have a highger probablity of getting parkinsons disease.
# Visual representation of respective data field features
fig, ax = plt.subplots(1,2, figsize=(18,9))
sns.distplot(df['RPDE'], bins = 25, ax = ax[0])
sns.distplot(df['D2'], bins = 25, ax = ax[1]);
Visualization through the histogram above shows us that (D2) alomst resembles a normal distribution & (RPDE) is positively skewed.
# Plotting a graph for visual analysis of feature RPDE
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['RPDE'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['RPDE'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('RPDE Distribution');
# Boxplot visual analysis of feature RPDE
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'RPDE', data=df);
# Plotting a graph for visual analysis of feature D2
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['D2'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['D2'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('D2 Distribution');
# Boxplot visual analysis of feature D2
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'D2', data=df);
Peak of distribution very close and dispersion range very similar in terms of healthy individuals and those with parkinsons disease.
# Visual representation of respective data field features
fig, ax = plt.subplots(1,3, figsize=(18,10))
sns.distplot(df['spread1'], bins = 25, ax = ax[0])
sns.distplot(df['spread2'], bins = 25, ax = ax[1])
sns.distplot(df['PPE'], bins = 25, ax = ax[2]);
Visualization through the histogram above shows us that (spread1) & (spread2) are almost normally distributed, whereas (PPE) is positively distributed.
# Plotting a graph for visual analysis of feature spread1
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['spread1'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['spread1'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('spread1 Distribution');
People with spread1 value (< - 5.0) have higher probability of getting parkinsons disease.
# Boxplot visual analysis of feature spread1
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'spread1', data=df);
# Plotting a graph for visual analysis of feature spread2
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['spread2'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['spread2'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('spread2 Distribution');
# Boxplot visual analysis of feature spread2
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'spread2', data=df);
Frequency variation range of people with parkinsons is more and wide spread, even though peaks of distributions are quite close.
# Plotting a graph for visual analysis of feature PPE
plt.figure(figsize=(10,6))
sns.distplot(df[df['status'] == 0]['PPE'], kde=True, color = 'g', label = 'status = 0')
sns.distplot(df[df['status'] == 1]['PPE'], kde=True, color = 'r', label = 'status = 1')
plt.legend()
plt.title('PPE Distribution');
Peak of distribution for healthy people is at (0.1) and for people with Parkinsons is between (0.2 - 0.25).
Although peak of healthy people is double in size to that of people with parkinsons, despite that there are lower number of healthy people in the sample population.
# Boxplot visual analysis of feature PPE
plt.figure(figsize=(7,5))
sns.boxplot(x = 'status', y = 'PPE', data=df);
PPE of healthy people end near the median of PPE of people with Parkinsons(at around 0.2)
Individuals with PPE value (> 0.2) have high probability of having parkinsons disease.
df.corr() # Finding the correlation amongst different data features
# Visualizing through a graph the different correlations in the data
plt.figure(figsize=(16,18))
corr = df.corr()
sns.heatmap(corr, annot= True, vmin=-1, vmax=1, cmap = 'Pastel1');
# Finding highly correlated features
corr_pos = corr.abs()
mask = (corr_pos < 0.8)
fig, ax = plt.subplots(figsize=[16,18])
sns.heatmap(corr, annot = True, vmin = -1, vmax = 1, center = 0, mask = mask, cmap = 'Pastel1');
# Visualizing a pair plot for the data.
sns.pairplot(df, diag_kind='kde', hue = 'status');
# Train and Test split
x = df.drop(['status', 'name'], axis=1) # Dropping certain fields from independent variables
y = df[['status']]
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state = 1, test_size = 0.30)
# Checking split data into train and test data sets
print('{0:2.2f}% Data in Train Set'.format((len(x_train)/len(df.index))*100))
print('{0:2.2f}% Data in Test Set'. format((len(x_test)/len(df.index))*100))
x_train.head()
x_train.describe()
scaler = preprocessing.StandardScaler() # Creating the scalar object
x_train_scaled = scaler.fit_transform(x_train) # Fitting train data on scaled object
x_test_scaled = scaler.fit_transform(x_test) # Fitting test data on scaled object
x_train.isna().sum()
We can see from the above result that the data has no missing values, hence we will go ahead with just scaling the dataset.
logmod = LogisticRegression(max_iter = 1000)
logmod.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
logmod.score(x_train_scaled, y_train) # Training data score
for idx, col_name in enumerate (x_train.columns):
print('The Coefficient for {} is {}'.format(col_name, logmod.coef_[0][idx]))
Coefficient of each variable is described above.
intercept = logmod.intercept_[0]
print('The intercept of our model is {}'.format(intercept)) # Findng the intercept of our model
y_pred = logmod.predict(x_test_scaled)
logmod_acc = accuracy_score(y_test, y_pred) # Getting accuracy score of model on test data
logmod_F1 = f1_score(y_test, y_pred) # Getting f1 score of the model
logmod_AUC = roc_auc_score(y_test, y_pred) # Getting area under curve score
logmod_rec = recall_score(y_test, y_pred) # Getting recall score of the model
logmod_conf = confusion_matrix(y_test, y_pred) # Visualizing the confusion matrix
logmod_classrep = classification_report(y_test, y_pred) # Visualizing the classification report
print('The Accuracy Score of the model is {0:.2f}%'.format(logmod_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'.format(logmod_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'.format(logmod_rec*100))
print()
print('AUC score of the model is {0:.2f}%'.format(logmod_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', logmod_conf)
print()
print('Deatiled Classification Report of the model is :\n', logmod_classrep)
# Plotting confusion matrix graphically
cm = metrics.confusion_matrix(y_test, y_pred, labels = [0,1])
df_cm = pd.DataFrame(cm, index = [i for i in ['0', '1']],
columns = [i for i in ['Healthy', 'Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm, annot=True, cmap='Pastel1', fmt='g');
KNNmod = KNeighborsClassifier(n_neighbors = 1, weights = 'distance')
KNNmod.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
KNNmod.score(x_train_scaled, y_train) # Training data score
ypred = KNNmod.predict(x_test_scaled)
KNNmod_acc = accuracy_score(y_test, ypred) # Getting accuracy score of the model on test data
KNNmod_F1 = f1_score(y_test, ypred) # Getting f1 score of the model
KNNmod_rec = recall_score(y_test, ypred) # Getting recall score of the model
KNNmod_AUC = roc_auc_score(y_test, ypred) # Finding score of area under curve
KNNmod_conf = confusion_matrix(y_test, ypred) # Visualizing the confusion matrix of the model
KNNmod_class = classification_report(y_test, ypred) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(KNNmod_acc*100))
print()
print('The F1 Score of the model is {0:.2f}%'. format(KNNmod_F1*100))
print()
print('The Recall Score of the model is {0:.2f}%'. format(KNNmod_rec*100))
print()
print('AUC score of the model is {0:.2f}%'. format(KNNmod_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', KNNmod_conf)
print()
print('Detailed Classification Report of the model is :\n', KNNmod_class)
# Plotting confusion matrix graphically
cm1 = metrics.confusion_matrix(y_test, ypred, labels = [0,1])
df_cm1 = pd.DataFrame(cm1, index = [i for i in ['0','1']],
columns = [i for i in ['Healhty', 'Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm1, annot=True, cmap = 'Pastel1', fmt='g');
gaumod = GaussianNB()
gaumod.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
gaumod.score(x_train_scaled, y_train) # Training data score
ypd = gaumod.predict(x_test_scaled)
gaumod_acc = accuracy_score(y_test, ypd) # Getting accuracy score of model on test data
gaumod_F1 = f1_score(y_test, ypd) # Getting f1 score of the model
gaumod_rec = recall_score(y_test, ypd) # Getting recall score of the model
gaumod_AUC = roc_auc_score(y_test, ypd) # Finding score of area under curve
gaumod_conf = confusion_matrix(y_test, ypd) # Visualizing the confusion matrix of the model
gaumod_class = classification_report(y_test, ypd) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(gaumod_acc*100))
print()
print('The F1 Score of the model is {0:.2f}%'. format(gaumod_F1*100))
print()
print('The Recall Score of the model is {0:.2f}%'. format(gaumod_rec*100))
print()
print('AUC score of the model is {0:.2f}%'. format(gaumod_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', gaumod_conf)
print()
print('The Classification Report of the model is :\n', gaumod_class)
# Plotting confusion matrix graphically
cm2 = metrics.confusion_matrix(y_test, ypd, labels=[0,1])
df_cm2 = pd.DataFrame(cm2, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm2, annot=True, cmap = 'Pastel2', fmt='g');
Svmlinear = svm.SVC(C = 3, gamma = 0.025, kernel='linear')
Svmlinear.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
Svmlinear.score(x_train_scaled, y_train) # Training data score
ypre = Svmlinear.predict(x_test_scaled)
Svmlinear_acc = accuracy_score(y_test, ypre) # Getting accuracy score of model on test data
Svmlinear_F1 = f1_score(y_test, ypre) # Getting f1 score of the model
Svmlinear_rec = recall_score(y_test, ypre) # Getting recall score of the model
Svmlinear_AUC = roc_auc_score(y_test, ypre) # Finding score of area under curve
Svmlinear_conf = confusion_matrix(y_test, ypre) # Visualizing the confusion matrix of the model
Svmlinear_class = classification_report(y_test, ypre) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(Svmlinear_acc*100))
print()
print('The F1 Score of the model is {0:.2f}%'. format(Svmlinear_F1*100))
print()
print('The Recall Score of the model is {0:.2f}%'. format(Svmlinear_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(Svmlinear_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', Svmlinear_conf)
print()
print('Detailed Classification Report of the model is :\n', Svmlinear_class)
# Plotting confusion matrix graphically
cm3 = metrics.confusion_matrix(y_test, ypre, labels = [0,1])
df_cm3 = pd.DataFrame(cm3, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm3, annot=True, cmap='Pastel2', fmt='g');
Svmrbf = svm.SVC(C = 70, gamma = 0.1, kernel='rbf')
Svmrbf.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
Svmrbf.score(x_train_scaled, y_train) # Training data score
ypr = Svmrbf.predict(x_test_scaled)
Svmrbf_acc = accuracy_score(y_test, ypr) # Getting accuracy score of model on test data
Svmrbf_F1 = f1_score(y_test, ypr) # Getting f1 score of the model
Svmrbf_rec = recall_score(y_test, ypr) # Getting recall score of the model
Svmrbf_AUC = roc_auc_score(y_test, ypr) # Finding score of area under curve
Svmrbf_conf = confusion_matrix(y_test, ypr) # Visualizing the confusion matrix of the model
Svmrbf_class = classification_report(y_test, ypr) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(Svmrbf_acc*100))
print()
print('The F1 Score of the model is {0:.2f}%'. format(Svmrbf_F1*100))
print()
print('The Recall Score of the model is {0:.2f}%'. format(Svmrbf_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(Svmrbf_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', Svmrbf_conf)
print()
print('Detailed Classification Report of the model is :\n', Svmrbf_class)
# Plotting confusion matrix graphically
cm4 = metrics.confusion_matrix(y_test, ypr, labels = [0,1])
df_cm4 = pd.DataFrame(cm4, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm4, annot=True, cmap='Pastel2', fmt='g');
Svmpoly = svm.SVC(C = 50, gamma = 0.10, kernel='poly')
Svmpoly.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
Svmpoly.score(x_train_scaled, y_train) # Training data score
y_pr = Svmpoly.predict(x_test_scaled)
Svmpoly_acc = accuracy_score(y_test, y_pr) # Getting accuracy score of model on test data
Svmpoly_F1 = f1_score(y_test, y_pr) # Getting f1 score of the model
Svmpoly_rec = recall_score(y_test, y_pr) # Getting recall score of the model
Svmpoly_AUC = roc_auc_score(y_test, y_pr) # Finding score of area under curve
Svmpoly_conf = confusion_matrix(y_test, y_pr) # Visualizing the confusion matrix of the model
Svmpoly_class = classification_report(y_test, y_pr) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(Svmpoly_acc*100))
print()
print('The F1 Score of the model is {0:.2f}%'. format(Svmpoly_F1*100))
print()
print('The Recall Score of the model is {0:.2f}%'. format(Svmpoly_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(Svmpoly_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', Svmpoly_conf)
print()
print('Detailed Classification Report of the model is :\n', Svmpoly_class)
# Plotting confusion matrix graphically
cm5 = metrics.confusion_matrix(y_test, y_pr, labels = [0,1])
df_cm5 = pd.DataFrame(cm5, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm5, annot=True, cmap='Pastel2', fmt='g');
Svmsig = svm.SVC(C = 70, gamma = 0.01, kernel='sigmoid')
Svmsig.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data set
Svmsig.score(x_train_scaled, y_train) # Training data score
y_prd = Svmsig.predict(x_test_scaled)
Svmsig_acc = accuracy_score(y_test, y_prd) # Getting accuracy score of model on test data
Svmsig_F1 = f1_score(y_test, y_prd) # Getting f1 score of the model
Svmsig_rec = recall_score(y_test, y_prd) # Getting recall score of the model
Svmsig_AUC = roc_auc_score(y_test, y_prd) # Finding score of area under curve
Svmsig_conf = confusion_matrix(y_test, y_prd) # Visualizing the confusion matrix of the model
Svmsig_class = classification_report(y_test, y_prd) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(Svmsig_acc*100))
print()
print('The F1 Score of the model is {0:.2f}%'. format(Svmsig_F1*100))
print()
print('The Recall Score of the model is {0:.2f}%'. format(Svmsig_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(Svmsig_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', Svmsig_conf)
print()
print('Detailed Classification Report of the model is :\n', Svmsig_class)
# Plotting confusion matrix graphically
cm6 = metrics.confusion_matrix(y_test, y_prd, labels = [0,1])
df_cm6 = pd.DataFrame(cm6, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm6, annot=True, cmap='Pastel2', fmt='g');
dtmod = DecisionTreeClassifier(criterion = 'gini', random_state=1, max_depth = 1)
dtmod.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data
dtmod.score(x_train_scaled, y_train) # Training data score
y_pre = dtmod.predict(x_test_scaled)
dtmod_acc = accuracy_score(y_test, y_pre) # Getting accuracy score of model on test data
dtmod_F1 = f1_score(y_test, y_pre) # Getting f1 score of the model
dtmod_rec = recall_score(y_test, y_pre) # Getting recall score of the model
dtmod_AUC = roc_auc_score(y_test, y_pre) # Finding score of area under curve
dtmod_conf = confusion_matrix(y_test, y_pre) # Visualizing the confusion matrix of the model
dtmod_class = classification_report(y_test, y_pre) # Visualizing the classification report of the model
print('The Accuracy score of the model is {0:.2f}%'. format(dtmod_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(dtmod_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(dtmod_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(dtmod_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', dtmod_conf)
print()
print('The Classification Report of the model is :\n', dtmod_class)
# Visualizing the data graphically in a tree form
from sklearn.tree import plot_tree
clf = DecisionTreeClassifier(max_depth=3)
clf.fit(x_train_scaled, y_train)
plt.figure(figsize=(16,10))
a = plot_tree(clf, feature_names = df_feature_names, filled = True, rounded =True, fontsize =14)
# Importance of features in the tree building
print(pd.DataFrame(dtmod.feature_importances_, columns=['imp'], index = x_train.columns))
# Plotting confusion matrix graphically
cm7 = metrics.confusion_matrix(y_test, y_pre, labels=[0,1])
df_cm7 = pd.DataFrame(cm7, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm7, annot=True, cmap='Pastel2', fmt='g');
bgcl = BaggingClassifier(n_estimators = 6, random_state=1)
bgcl.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data
bgcl.score(x_train_scaled, y_train) # Training data score
pred_y = bgcl.predict(x_test_scaled)
bgcl_acc = accuracy_score(y_test, pred_y) # Getting accuracy score of model on test data
bgcl_F1 = f1_score(y_test, pred_y) # Getting f1 score of the model
bgcl_rec = recall_score(y_test, pred_y) # Getting recall score of the model
bgcl_AUC = roc_auc_score(y_test, pred_y) # Finding score of area under curve
bgcl_conf = confusion_matrix(y_test, pred_y) # Visualizing the confusion matrix of the model
bgcl_class = classification_report(y_test, pred_y) # Visualizing the classification report of the model
print('The Accuracy score of the model is {0:.2f}%'. format(bgcl_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(bgcl_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(bgcl_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(bgcl_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', bgcl_conf)
print()
print('The Classification Report of the model is :\n', bgcl_class)
# Plotting confusion matrix graphically
cm8 = metrics.confusion_matrix(y_test, pred_y, labels=[0,1])
df_cm8 = pd.DataFrame(cm8, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm8, annot=True, cmap='Pastel2', fmt='g');
adcl = AdaBoostClassifier(n_estimators = 18, random_state=1)
adcl.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on train data
adcl.score(x_train_scaled, y_train) # Training data score
pre_y = adcl.predict(x_test_scaled)
adcl_acc = accuracy_score(y_test, pre_y) # Getting accuracy score of model on test data
adcl_F1 = f1_score(y_test, pre_y) # Getting f1 score of the model
adcl_rec = recall_score(y_test, pre_y) # Getting recall score of the model
adcl_AUC = roc_auc_score(y_test, pre_y) # Finding score of area under curve
adcl_conf = confusion_matrix(y_test, pre_y) # Visualizing the confusion matrix of the model
adcl_class = classification_report(y_test, pre_y) # Visualizing the classification report of the model
print('The Accuracy score of the model is {0:.2f}%'. format(adcl_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(adcl_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(adcl_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(adcl_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', adcl_conf)
print()
print('The Classification Report of the model is :\n', adcl_class)
# Plotting confusion matrix graphically
cm9 = metrics.confusion_matrix(y_test, pre_y, labels=[0,1])
df_cm9 = pd.DataFrame(cm9, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm9, annot=True, cmap='Pastel2', fmt='g');
gbcl = GradientBoostingClassifier(n_estimators = 95, random_state=1)
gbcl.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data
gbcl.score(x_train_scaled, y_train) # Training data score
pr_y = gbcl.predict(x_test_scaled)
gbcl_acc = accuracy_score(y_test, pr_y) # Getting accuracy score of model on test data
gbcl_F1 = f1_score(y_test, pr_y) # Getting f1 score of the model
gbcl_rec = recall_score(y_test, pr_y) # Getting recall score of the model
gbcl_AUC = roc_auc_score(y_test, pr_y) # Finding score of area under curve
gbcl_conf = confusion_matrix(y_test, pr_y) # Visualizing the confusion matrix of the model
gbcl_class = classification_report(y_test, pr_y) # Visualizing the classification report of the model
print('The Accuracy score of the model is {0:.2f}%'. format(gbcl_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(gbcl_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(gbcl_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(gbcl_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', gbcl_conf)
print()
print('The Classification Report of the model is :\n', gbcl_class)
# Plotting confusion matrix graphically
cm10 = metrics.confusion_matrix(y_test, pr_y, labels=[0,1])
df_cm10 = pd.DataFrame(cm10, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm10, annot=True, cmap='Pastel2', fmt='g');
rfmod = RandomForestClassifier(n_estimators = 50, random_state=1, max_features=3)
rfmod.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on training data
rfmod.score(x_train_scaled, y_train) # Training data score
p_y = rfmod.predict(x_test_scaled)
rfmod_acc = accuracy_score(y_test, p_y) # Getting accuracy score of model on test data
rfmod_F1 = f1_score(y_test, p_y) # Getting f1 score of the model
rfmod_rec = recall_score(y_test, p_y) # Getting recall score of the model
rfmod_AUC = roc_auc_score(y_test, p_y) # Finding score of area under curve
rfmod_conf = confusion_matrix(y_test, p_y) # Visualizing the confusion matrix of the model
rfmod_class = classification_report(y_test, p_y) # Visualizing the classification report of the model
print('The Accuracy score of the model is {0:.2f}%'. format(rfmod_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(rfmod_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(rfmod_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(rfmod_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', rfmod_conf)
print()
print('The Classification Report of the model is :\n', rfmod_class)
# Visualizing the data graphically in a tree form
from sklearn.tree import plot_tree
clf1 = DecisionTreeClassifier(max_depth = 3)
clf1.fit(x_train_scaled, y_train) # Fitting model on training data
plt.figure(figsize=(16,10))
b = plot_tree(clf1, feature_names = df_feature_names, filled =True, rounded = True, fontsize=14)
# Plotting confusion matrix graphically
cm11 = metrics.confusion_matrix(y_test, p_y, labels=[0,1])
df_cm11 = pd.DataFrame(cm11, index = [i for i in ['0','1']],
columns = [i for i in ['Healthy','Parkinsons']])
plt.figure(figsize=(8,5))
sns.heatmap(df_cm11, annot=True, cmap='Pastel2', fmt='g');
from sklearn.ensemble import StackingClassifier
level0 = list()
level0.append(('lr', LogisticRegression()))
level0.append(('knn', KNeighborsClassifier(n_neighbors=1, weights='distance', metric = 'euclidean')))
level0.append(('bayes', GaussianNB()))
level0.append(('svm', svm.SVC(gamma=0.1, C=70)))
level1 = LogisticRegression()
stack_LGR = StackingClassifier(estimators = level0, final_estimator=level1, cv=10)
stack_LGR.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on taining data
stack_LGR.score(x_train_scaled, y_train) # Training data score
stpred = stack_LGR.predict(x_test_scaled)
stack_LGR_acc = accuracy_score(y_test, stpred) # Getting accuracy score on test data
stack_LGR_F1 = f1_score(y_test, stpred) # Getting f1 score of the model
stack_LGR_rec = recall_score(y_test, stpred) # Getting recall score of the model
stack_LGR_AUC = roc_auc_score(y_test, stpred) # Finding score of area under curve
stack_LGR_conf = confusion_matrix(y_test, stpred) # Visualizing the confusion matrix of the model
stack_LGR_class = classification_report(y_test, stpred) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(stack_LGR_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(stack_LGR_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(stack_LGR_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(stack_LGR_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', stack_LGR_conf)
print()
print('The Classification Report of the model is :\n', stack_LGR_class)
level1 = svm.SVC(gamma=0.1, C=70)
stack_SVM = StackingClassifier(estimators = level0, final_estimator=level1, cv=10)
stack_SVM.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on train data
stpred1 = stack_SVM.predict(x_test_scaled)
stack_SVM_acc = accuracy_score(y_test, stpred1) # Getting accuracy score on test data set
stack_SVM_acc = accuracy_score(y_test, stpred1) # Getting accuracy score on test data
stack_SVM_F1 = f1_score(y_test, stpred1) # Getting f1 score of the model
stack_SVM_rec = recall_score(y_test, stpred1) # Getting recall score of the model
stack_SVM_AUC = roc_auc_score(y_test, stpred1) # Finding score of area under curve
stack_SVM_conf = confusion_matrix(y_test, stpred1) # Visualizing the confusion matrix of the model
stack_SVM_class = classification_report(y_test, stpred1) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(stack_SVM_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(stack_SVM_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(stack_SVM_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(stack_SVM_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', stack_SVM_conf)
print()
print('The Classification Report of the model is :\n', stack_SVM_class)
level1 = KNeighborsClassifier(n_neighbors = 3, weights = 'distance')
stack_KNN = StackingClassifier(estimators = level0, final_estimator=level1, cv=10)
stack_KNN.fit(x_train_scaled, y_train.values.ravel()) # Fitting model on train data
stpred2 = stack_KNN.predict(x_test_scaled)
stack_KNN_acc = accuracy_score(y_test, stpred2) # Getting accuracy score on test data set
stack_KNN_F1 = f1_score(y_test, stpred2) # Getting f1 score of the model
stack_KNN_rec = recall_score(y_test, stpred2) # Getting recall score of the model
stack_KNN_AUC = roc_auc_score(y_test, stpred2) # Finding score of area under curve
stack_KNN_conf = confusion_matrix(y_test, stpred2) # Visualizing the confusion matrix of the model
stack_KNN_class = classification_report(y_test, stpred2) # Visualizing the classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(stack_KNN_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(stack_KNN_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(stack_KNN_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(stack_KNN_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', stack_KNN_conf)
print()
print('The Classification Report of the model is :\n', stack_KNN_class)
level1 = GaussianNB()
stack_Gau = StackingClassifier(estimators = level0, final_estimator=level1, cv=10)
stack_Gau.fit(x_train_scaled, y_train.values.ravel()) # Fitting data on training set
stpred3 = stack_Gau.predict(x_test_scaled)
stack_Gau_acc = accuracy_score(y_test, stpred3) # Getting accuracy score of the model
stack_Gau_F1 = f1_score(y_test, stpred3) # Getting f1 score of the model
stack_Gau_rec = recall_score(y_test, stpred3) # Getting recall score of the model
stack_Gau_AUC = roc_auc_score(y_test, stpred3) # Finding score of AUC
stack_Gau_conf = confusion_matrix(y_test, stpred3) # Getting confusion matrix of the model
stack_Gau_class = classification_report(y_test, stpred3) # Getting classification report of the model
print('The Accuracy Score of the model is {0:.2f}%'. format(stack_Gau_acc*100))
print()
print('The F1 score of the model is {0:.2f}%'. format(stack_Gau_F1*100))
print()
print('The Recall score of the model is {0:.2f}%'. format(stack_Gau_rec*100))
print()
print('AUC Score of the model is {0:.2f}%'. format(stack_Gau_AUC*100))
print()
print('The Confusion Matrix of the model is :\n', stack_Gau_conf)
print()
print('The Classification Report of the model is :\n', stack_Gau_class)
# Comparing all models
df_comp = pd.DataFrame({'Algorithm':['Logistic Reg', 'KNN', 'Naive','SVM Linear','SVM RBF','SVM Poly','SVM Sigmoid','Decision Tree','Bagging','AdaBoosting','Gradient Boosting','Random Forest','Stacking Log Reg','Stacking SVM','Stacking KNN','Stacking Naive'],
'Accuracy Score (%)':[logmod_acc*100,KNNmod_acc*100,gaumod_acc*100,Svmlinear_acc*100, Svmrbf_acc*100, Svmpoly_acc*100, Svmsig_acc*100, dtmod_acc*100, bgcl_acc*100, adcl_acc*100, gbcl_acc*100, rfmod_acc*100, stack_LGR_acc*100, stack_SVM_acc*100, stack_KNN_acc*100, stack_Gau_acc*100], # Pulling out accuracy score of all models
'F1-score (%)':[logmod_F1*100,KNNmod_F1*100,gaumod_F1*100,Svmlinear_F1*100, Svmrbf_F1*100, Svmpoly_F1*100, Svmsig_F1*100, dtmod_F1*100, bgcl_F1*100, adcl_F1*100, gbcl_F1*100, rfmod_F1*100, stack_LGR_F1*100, stack_SVM_F1*100, stack_KNN_F1*100, stack_Gau_F1*100], # Pulling out F1 score of all models
'Recall Score(%)':[logmod_rec*100, KNNmod_rec*100, gaumod_rec*100, Svmlinear_rec*100, Svmrbf_rec*100, Svmpoly_rec*100, Svmsig_rec*100, dtmod_rec*100, bgcl_rec*100, adcl_rec*100, gbcl_rec*100, rfmod_rec*100, stack_LGR_rec*100,stack_SVM_rec*100,stack_KNN_rec*100, stack_Gau_rec*100]}) # Pulling out recall score of all models
print("Following table shows comparison of the classification algorithms (using scaled data): ")
df_comp
We can observe from the above result that "KNN Model" has :
-------- Whereas "Stacking model of Logistic Regresion" has :
From the above analysis both models are almost similar but we should go with "KNN Model as the ('Precison Score') which plays an important role in data analysis is higher, though the Stacking model of Logistic Regssion" has higher or similar scores on other parameters.